For Pulse Ox. Analysis, make sure the data file is the right .csv format:
a) Headings on Row 1
b) Open the csv file through Notepad or TextEdit and delete extra
row commas (non-printable characters)
c) There are always Dates in Column A and Time in Column B.
d) There might be a row that says "Time Gap Present". Delete this row from Notepad
or TextEdit
In [1]:
#the usual beginning
import pandas as pd
import numpy as np
from pandas import Series, DataFrame
from datetime import datetime, timedelta
from pandas import concat
In [2]:
samefile = '/Users/John/Dropbox/LLU/ROP/Pulse Ox/ROP013PO.csv'
#Put file path above
df_PI = pd.read_csv(samefile,
parse_dates={'timestamp': ['Date','Time']},
index_col='timestamp',
usecols=['Date', 'Time', 'PI'],
na_values=['0'])
df_O2 = pd.read_csv(samefile,
parse_dates={'timestamp': ['Date','Time']},
index_col='timestamp',
usecols=['Date', 'Time', 'SpO2'],
na_values=['0'])
df_PR = pd.read_csv(samefile,
parse_dates={'timestamp': ['Date','Time']},
index_col='timestamp',
usecols=['Date', 'Time', 'PR'],
na_values=['0'])
# parse_dates tells the read_csv function to combine the date and time column
# into one timestamp column and parse it as a timestamp.
# (pandas is smart enough to know how to parse a date in various formats)
# index_col sets the timestamp column to be the index.
# usecols tells the read_csv function to select only the subset of the columns.
# na_values is used to turn 0 into NaN
In [3]:
df_O2_first = df_O2.first_valid_index() #get the first number from index
Y = pd.to_datetime(df_O2_first) #convert index to datetime
# Y = TIME DATA COLLECTION BEGAN / First data point on CSV
# SYNTAX:
# datetime(year, month, day[, hour[, minute[, second[, microsecond[,tzinfo]]]]])
X = datetime(2015, 10, 14, 9, 36)
# X = ROP Exam Started
Z = datetime(2015, 10, 14, 9, 40)
# Z = ROP Exam Ended
df_O2_last = df_O2.last_valid_index() #get the last number from index
Q = pd.to_datetime(df_O2_last)
# Q = TIME DATA COLLECTION ENDED / Last Data point on CSV
In [4]:
df0_PI = df_PI[Y:X]
avg0_PI = df0_PI.mean()
df0_O2 = df_O2[Y:X]
avg0_O2 = df0_O2.mean()
df0_PR = df_PR[Y:X]
avg0_PR = df0_PR.mean()
print ['Baseline Avgs', avg0_PI, avg0_O2, avg0_PR]
In [5]:
#AVERAGE PI DURING ROP EXAM FOR FIRST 4 MINUTES
# Set "X+timedelta(minutes=4)" to "Z" if you want for full duration of exam
def perdelta(start, end, delta):
r1_PI = []
curr = start
while curr < end:
r1_PI.append(curr)
curr += delta
return r1_PI
df1_PI = df_PI[X:X+timedelta(minutes=4)]
win1_PI = timedelta(seconds=10) #any unit of time
#make the range
r1_PI = perdelta(X, X+timedelta(minutes=4), win1_PI)
#make the series to store
avg1_PI = Series(index = r1_PI, name = 'PI During')
#average!
for i1_PI in r1_PI:
avg1_PI[i1_PI] = df1_PI[i1_PI:(i1_PI+win1_PI)].mean()
#
#AVERAGE SpO2 DURING ROP EXAM
#
def perdelta(start, end, delta):
r1_O2 = []
curr = start
while curr < end:
r1_O2.append(curr)
curr += delta
return r1_O2
df1_O2 = df_O2[X:X+timedelta(minutes=4)]
win1_O2 = timedelta(seconds=10) #any unit of time
#make the range
r1_O2 = perdelta(X, X+timedelta(minutes=4), win1_O2)
#make the series to store
avg1_O2 = Series(index = r1_O2, name = 'SpO2 During')
#average!
for i1_O2 in r1_O2:
avg1_O2[i1_O2] = df1_O2[i1_O2:(i1_O2+win1_O2)].mean()
#
#AVERAGE PR DURING ROP EXAM
#
def perdelta(start, end, delta):
r1_PR = []
curr = start
while curr < end:
r1_PR.append(curr)
curr += delta
return r1_PR
df1_PR = df_PR[X:X+timedelta(minutes=4)]
win1_PR = timedelta(seconds=10) #any unit of time
#make the range
r1_PR = perdelta(X, X+timedelta(minutes=4), win1_PR)
#make the series to store
avg1_PR = Series(index = r1_PR, name = 'PR During')
#average!
for i1_PR in r1_PR:
avg1_PR[i1_PR] = df1_PR[i1_PR:(i1_PR+win1_PR)].mean()
avg1_PR
result1 = concat([avg1_PI, avg1_O2, avg1_PR], axis=1, join='inner')
print result1
In [6]:
#AVERAGE PI EVERY 5 MIN AFTER HOUR 1 ROP EXAM
def perdelta(start, end, delta):
r2_PI = []
curr = start
while curr < end:
r2_PI.append(curr)
curr += delta
return r2_PI
df2_PI = df_PI[Z:(Z+timedelta(hours=1))]
win2_PI = timedelta(minutes=5) #any unit of time
#make the range
r2_PI = perdelta(Z, (Z+timedelta(hours=1)), win2_PI)
#make the series to store
avg2_PI = Series(index = r2_PI, name = 'PI q 5 Min Hr 1-2')
#average!
for i1_PI in r2_PI:
avg2_PI[i1_PI] = df2_PI[i1_PI:(i1_PI+win2_PI)].mean()
#
#AVERAGE SpO2 Q 5 MIN HR 1-2 ROP EXAM
#
def perdelta(start, end, delta):
r2_O2 = []
curr = start
while curr < end:
r2_O2.append(curr)
curr += delta
return r2_O2
df2_O2 = df_O2[Z:(Z+timedelta(hours=1))]
win2_O2 = timedelta(minutes=5) #any unit of time
#make the range
r2_O2 = perdelta(Z, (Z+timedelta(hours=1)), win2_O2)
#make the series to store
avg2_O2 = Series(index = r2_O2, name = 'SpO2 q 5 Min Hr 1-2')
#average!
for i1_O2 in r2_O2:
avg2_O2[i1_O2] = df2_O2[i1_O2:(i1_O2+win2_O2)].mean()
#
#AVERAGE PR Q 5 MIN HR 1-2 ROP EXAM
#
def perdelta(start, end, delta):
r2_PR = []
curr = start
while curr < end:
r2_PR.append(curr)
curr += delta
return r2_PR
df2_PR = df_PR[Z:(Z+timedelta(hours=1))]
win2_PR = timedelta(minutes=5) #any unit of time
#make the range
r2_PR = perdelta(Z, (Z+timedelta(hours=1)), win2_PR)
#make the series to store
avg2_PR = Series(index = r2_PR, name = 'PR q 5 Min Hr 1-2')
#average!
for i1_PR in r2_PR:
avg2_PR[i1_PR] = df2_PR[i1_PR:(i1_PR+win2_PR)].mean()
avg2_PR
result2 = concat([avg2_PI, avg2_O2, avg2_PR], axis=1, join='inner')
print result2
In [7]:
#
#AVERAGE PI Q 15 MIN HR 2-3 ROP EXAM
#
def perdelta(start, end, delta):
r3_PI = []
curr = start
while curr < end:
r3_PI.append(curr)
curr += delta
return r3_PI
df3_PI = df_PI[(Z+timedelta(hours=1)):(Z+timedelta(hours=2))]
win3_PI = timedelta(minutes=15) #any unit of time
#make the range
r3_PI = perdelta((Z+timedelta(hours=1)), (Z+timedelta(hours=2)), win3_PI)
#make the series to store
avg3_PI = Series(index = r3_PI, name = 'PI q 15min Hr 2-3')
#average!
for i1_PI in r3_PI:
avg3_PI[i1_PI] = df3_PI[i1_PI:(i1_PI+win3_PI)].mean()
#
#AVERAGE O2 Q 15 MIN HR 2-3 ROP EXAM
#
def perdelta(start, end, delta):
r3_O2 = []
curr = start
while curr < end:
r3_O2.append(curr)
curr += delta
return r3_O2
df3_O2 = df_O2[(Z+timedelta(hours=1)):(Z+timedelta(hours=2))]
win3_O2 = timedelta(minutes=15) #any unit of time
#make the range
r3_O2 = perdelta((Z+timedelta(hours=1)), (Z+timedelta(hours=2)), win3_O2)
#make the series to store
avg3_O2 = Series(index = r3_O2, name = 'O2 q 15min Hr 2-3')
#average!
for i1_O2 in r3_O2:
avg3_O2[i1_O2] = df3_O2[i1_O2:(i1_O2+win3_O2)].mean()
#
#AVERAGE PR Q 15 MIN HR 2-3 ROP EXAM
#
def perdelta(start, end, delta):
r3_PR = []
curr = start
while curr < end:
r3_PR.append(curr)
curr += delta
return r3_PR
df3_PR = df_PR[(Z+timedelta(hours=1)):(Z+timedelta(hours=2))]
win3_PR = timedelta(minutes=15) #any unit of time
#make the range
r3_PR = perdelta((Z+timedelta(hours=1)), (Z+timedelta(hours=2)), win3_PR)
#make the series to stPRe
avg3_PR = Series(index = r3_PR, name = 'O2 q 15min Hr 2-3')
#average!
for i1_PR in r3_PR:
avg3_PR[i1_PR] = df3_PR[i1_PR:(i1_PR+win3_PR)].mean()
result3 = concat([avg3_PI, avg3_O2, avg3_PI], axis=1, join='inner')
print result3
In [8]:
#
#AVERAGE PI Q 30 MIN HR 3-4 ROP EXAM
#
def perdelta(start, end, delta):
r4_PI = []
curr = start
while curr < end:
r4_PI.append(curr)
curr += delta
return r4_PI
df4_PI = df_PI[(Z+timedelta(hours=2)):(Z+timedelta(hours=3))]
win4_PI = timedelta(minutes=30) #any unit of time
#make the range
r4_PI = perdelta((Z+timedelta(hours=2)), (Z+timedelta(hours=3)), win4_PI)
#make the series to store
avg4_PI = Series(index = r4_PI, name = 'PI q 30min Hr 3-4')
#average!
for i1_PI in r4_PI:
avg4_PI[i1_PI] = df4_PI[i1_PI:(i1_PI+win4_PI)].mean()
#
#AVERAGE O2 Q 30 MIN HR 3-4 ROP EXAM
#
def perdelta(start, end, delta):
r4_O2 = []
curr = start
while curr < end:
r4_O2.append(curr)
curr += delta
return r4_O2
df4_O2 = df_O2[(Z+timedelta(hours=2)):(Z+timedelta(hours=3))]
win4_O2 = timedelta(minutes=30) #any unit of time
#make the range
r4_O2 = perdelta((Z+timedelta(hours=2)), (Z+timedelta(hours=3)), win4_O2)
#make the series to store
avg4_O2 = Series(index = r4_O2, name = 'O2 q 30min Hr 3-4')
#average!
for i1_O2 in r4_O2:
avg4_O2[i1_O2] = df4_O2[i1_O2:(i1_O2+win4_O2)].mean()
#
#AVERAGE O2 Q 30 MIN HR 3-4 ROP EXAM
#
def perdelta(start, end, delta):
r4_O2 = []
curr = start
while curr < end:
r4_O2.append(curr)
curr += delta
return r4_O2
df4_O2 = df_O2[(Z+timedelta(hours=2)):(Z+timedelta(hours=3))]
win4_O2 = timedelta(minutes=30) #any unit of time
#make the range
r4_O2 = perdelta((Z+timedelta(hours=2)), (Z+timedelta(hours=3)), win4_O2)
#make the series to store
avg4_O2 = Series(index = r4_O2, name = 'O2 q 30min Hr 3-4')
#average!
for i1_O2 in r4_O2:
avg4_O2[i1_O2] = df4_O2[i1_O2:(i1_O2+win4_O2)].mean()
#
#AVERAGE PR Q 30 MIN HR 3-4 ROP EXAM
#
def perdelta(start, end, delta):
r4_PR = []
curr = start
while curr < end:
r4_PR.append(curr)
curr += delta
return r4_PR
df4_PR = df_PR[(Z+timedelta(hours=2)):(Z+timedelta(hours=3))]
win4_PR = timedelta(minutes=30) #any unit of time
#make the range
r4_PR = perdelta((Z+timedelta(hours=2)), (Z+timedelta(hours=3)), win4_PR)
#make the series to store
avg4_PR = Series(index = r4_PR, name = 'PR q 30min Hr 3-4')
#average!
for i1_PR in r4_PR:
avg4_PR[i1_PR] = df4_PR[i1_PR:(i1_PR+win4_PR)].mean()
result4 = concat([avg4_PI, avg4_O2, avg4_PI], axis=1, join='inner')
print result4
In [9]:
#
#AVERAGE PI Q HR 4-24 Hrs ROP EXAM
#
def perdelta(start, end, delta):
r5_PI = []
curr = start
while curr < end:
r5_PI.append(curr)
curr += delta
return r5_PI
df5_PI = df_PI[(Z+timedelta(hours=3)):(Z+timedelta(hours=24))]
win5_PI = timedelta(minutes=60) #any unit of time
#make the range
r5_PI = perdelta ((Z+timedelta(hours=3)), (Z+timedelta(hours=24)), win5_PI)
#make the series to store
avg5_PI = Series(index = r5_PI, name = 'PI q 1 Hr 4-24')
#average!
for i1_PI in r5_PI:
avg5_PI[i1_PI] = df5_PI[i1_PI:(i1_PI+win5_PI)].mean()
avg5_PI
#
#AVERAGE O2 Q HR 4-24 Hrs ROP EXAM
#
def perdelta(start, end, delta):
r5_O2 = []
curr = start
while curr < end:
r5_O2.append(curr)
curr += delta
return r5_O2
df5_O2 = df_O2[(Z+timedelta(hours=3)):(Z+timedelta(hours=24))]
win5_O2 = timedelta(minutes=60) #any unit of time
#make the range
r5_O2 = perdelta ((Z+timedelta(hours=3)), (Z+timedelta(hours=24)), win5_O2)
#make the series to store
avg5_O2 = Series(index = r5_O2, name = 'O2 q 1 Hr 4-24')
#average!
for i1_O2 in r5_O2:
avg5_O2[i1_O2] = df5_O2[i1_O2:(i1_O2+win5_O2)].mean()
avg5_O2
#
#AVERAGE PR Q HR 4-24 Hrs ROP EXAM
#
def perdelta(start, end, delta):
r5_PR = []
curr = start
while curr < end:
r5_PR.append(curr)
curr += delta
return r5_PR
df5_PR = df_PR[(Z+timedelta(hours=3)):(Z+timedelta(hours=24))]
win5_PR = timedelta(minutes=60) #any unit of time
#make the range
r5_PR = perdelta ((Z+timedelta(hours=3)), (Z+timedelta(hours=24)), win5_PR)
#make the series to store
avg5_PR = Series(index = r5_PR, name = 'PR q 1 Hr 4-24')
#average!
for i1_PR in r5_PR:
avg5_PR[i1_PR] = df5_PR[i1_PR:(i1_PR+win5_PR)].mean()
avg5_PR
result5 = concat([avg5_PI, avg5_O2, avg5_PI], axis=1, join='inner')
print result5
In [10]:
#export out all averages as a csv file with only
import csv
class excel_tab(csv.excel):
delimiter = '\t'
csv.register_dialect("excel_tab", excel_tab)
with open('ROP013_POAvgs.csv', 'w') as f: #CHANGE CSV FILE NAME
writer = csv.writer(f, dialect=excel_tab)
writer.writerow(avg0_PI)
for i1_PI in r1_PI:
writer.writerow(df1_PI[i1_PI:(i1_PI+win1_PI)].mean())
for i2_PI in r2_PI:
writer.writerow(df2_PI[i2_PI:(i2_PI+win2_PI)].mean())
for i3_PI in r3_PI:
writer.writerow(df3_PI[i3_PI:(i3_PI+win3_PI)].mean())
for i4_PI in r4_PI:
writer.writerow(df4_PI[i4_PI:(i4_PI+win4_PI)].mean())
for i5_PI in r5_PI:
writer.writerow(df5_PI[i5_PI:(i5_PI+win5_PI)].mean())
writer.writerow(avg0_O2)
for i1_O2 in r1_O2:
writer.writerow(df1_O2[i1_O2:(i1_O2+win1_O2)].mean())
for i2_O2 in r2_O2:
writer.writerow(df2_O2[i2_O2:(i2_O2+win2_O2)].mean())
for i3_O2 in r3_O2:
writer.writerow(df3_O2[i3_O2:(i3_O2+win3_O2)].mean())
for i4_O2 in r4_O2:
writer.writerow(df4_O2[i4_O2:(i4_O2+win4_O2)].mean())
for i5_O2 in r5_O2:
writer.writerow(df5_O2[i5_O2:(i5_O2+win5_O2)].mean())
writer.writerow(avg0_PR)
for i1_PR in r1_PR:
writer.writerow(df1_PR[i1_PR:(i1_PR+win1_PR)].mean())
for i2_PR in r2_PR:
writer.writerow(df2_PR[i2_PR:(i2_PR+win2_PR)].mean())
for i3_PR in r3_PR:
writer.writerow(df3_PR[i3_PR:(i3_PR+win3_PR)].mean())
for i4_PR in r4_PR:
writer.writerow(df4_PR[i4_PR:(i4_PR+win4_PR)].mean())
for i5_PR in r5_PR:
writer.writerow(df5_PR[i5_PR:(i5_PR+win5_PR)].mean())
In [11]:
df_O2_pre = df_O2[Y:X]
#Find count of these ranges
below = 0 # v <=80
middle = 0 #v >= 81 and v<=84
above = 0 #v >=85 and v<=89
ls = []
b_dict = {}
m_dict = {}
a_dict = {}
for i, v in df_O2_pre['SpO2'].iteritems():
if v <= 80: #below block
if not ls:
ls.append(v)
else:
if ls[0] >= 81: #if the range before was not below 80
if len(ls) >= 5: #if the range was greater than 10 seconds, set to 5 because data points are every 2
if ls[0] <= 84: #was it in the middle range?
m_dict[middle] = ls
middle += 1
ls = [v]
elif ls[0] >= 85 and ls[0] <=89: #was it in the above range?
a_dict[above] = ls
above += 1
ls = [v]
else: #old list wasn't long enough to count
ls = [v]
else: #if in the same range
ls.append(v)
elif v >= 81 and v<= 84: #middle block
if not ls:
ls.append(v)
else:
if ls[0] <= 80 or (ls[0]>=85 and ls[0]<= 89): #if not in the middle range
if len(ls) >= 5: #if range was greater than 10 seconds
if ls[0] <= 80: #was it in the below range?
b_dict[below] = ls
below += 1
ls = [v]
elif ls[0] >= 85 and ls[0] <=89: #was it in the above range?
a_dict[above] = ls
above += 1
ls = [v]
else: #old list wasn't long enough to count
ls = [v]
else:
ls.append(v)
elif v >= 85 and v <=89: #above block
if not ls:
ls.append(v)
else:
if ls[0] <=84 : #if not in the above range
if len(ls) >= 5: #if range was greater than
if ls[0] <= 80: #was it in the below range?
b_dict[below] = ls
below += 1
ls = [v]
elif ls[0] >= 81 and ls[0] <=84: #was it in the middle range?
m_dict[middle] = ls
middle += 1
ls = [v]
else: #old list wasn't long enough to count
ls = [v]
else:
ls.append(v)
else: #v>90 or something else weird. start the list over
ls = []
#final list check
if len(ls) >= 5:
if ls[0] <= 80: #was it in the below range?
b_dict[below] = ls
below += 1
ls = [v]
elif ls[0] >= 81 and ls[0] <=84: #was it in the middle range?
m_dict[middle] = ls
middle += 1
ls = [v]
elif ls[0] >= 85 and ls[0] <=89: #was it in the above range?
a_dict[above] = ls
above += 1
b_len = 0.0
for key, val in b_dict.iteritems():
b_len += len(val)
m_len = 0.0
for key, val in m_dict.iteritems():
m_len += len(val)
a_len = 0.0
for key, val in a_dict.iteritems():
a_len += len(val)
In [12]:
#post exam duraiton length analysis
df_O2_post = df_O2[Z:Q]
#Find count of these ranges
below2 = 0 # v <=80
middle2= 0 #v >= 81 and v<=84
above2 = 0 #v >=85 and v<=89
ls2 = []
b_dict2 = {}
m_dict2 = {}
a_dict2 = {}
for i2, v2 in df_O2_post['SpO2'].iteritems():
if v2 <= 80: #below block
if not ls2:
ls2.append(v2)
else:
if ls2[0] >= 81: #if the range before was not below 80
if len(ls2) >= 5: #if the range was greater than 10 seconds, set to 5 because data points are every 2
if ls2[0] <= 84: #was it in the middle range?
m_dict2[middle2] = ls2
middle2 += 1
ls2 = [v2]
elif ls2[0] >= 85 and ls2[0] <=89: #was it in the above range?
a_dict2[above2] = ls2
above2 += 1
ls2 = [v2]
else: #old list wasn't long enough to count
ls2 = [v2]
else: #if in the same range
ls2.append(v2)
elif v2 >= 81 and v2<= 84: #middle block
if not ls2:
ls2.append(v2)
else:
if ls2[0] <= 80 or (ls2[0]>=85 and ls2[0]<= 89): #if not in the middle range
if len(ls2) >= 5: #if range was greater than 10 seconds
if ls2[0] <= 80: #was it in the below range?
b_dict2[below2] = ls2
below2 += 1
ls2 = [v2]
elif ls2[0] >= 85 and ls2[0] <=89: #was it in the above range?
a_dict2[above2] = ls2
above2 += 1
ls2 = [v2]
else: #old list wasn't long enough to count
ls2 = [v2]
else:
ls2.append(v2)
elif v2 >= 85 and v2 <=89: #above block
if not ls2:
ls2.append(v2)
else:
if ls2[0] <=84 : #if not in the above range
if len(ls2) >= 5: #if range was greater than
if ls2[0] <= 80: #was it in the below range?
b_dict2[below2] = ls2
below2 += 1
ls2 = [v2]
elif ls2[0] >= 81 and ls2[0] <=84: #was it in the middle range?
m_dict2[middle2] = ls2
middle2 += 1
ls2 = [v2]
else: #old list wasn't long enough to count
ls2 = [v2]
else:
ls2.append(v2)
else: #v2>90 or something else weird. start the list over
ls2 = []
#final list check
if len(ls2) >= 5:
if ls2[0] <= 80: #was it in the below range?
b_dict2[below2] = ls2
below2 += 1
ls2= [v2]
elif ls2[0] >= 81 and ls2[0] <=84: #was it in the middle range?
m_dict2[middle2] = ls2
middle2 += 1
ls2 = [v2]
elif ls2[0] >= 85 and ls2[0] <=89: #was it in the above range?
a_dict2[above2] = ls2
above2 += 1
b_len2 = 0.0
for key, val2 in b_dict2.iteritems():
b_len2 += len(val2)
m_len2 = 0.0
for key, val2 in m_dict2.iteritems():
m_len2 += len(val2)
a_len2 = 0.0
for key, val2 in a_dict2.iteritems():
a_len2 += len(val2)
In [13]:
#print results from count and min
print "Desat Counts for X mins\n"
print "Pre Mild Desat (85-89) Count: %s\t" %above, "for %s min" %((a_len*2)/60.)
print "Pre Mod. Desat (81-84) Count: %s\t" %middle, "for %s min" %((m_len*2)/60.)
print "Pre Sev. Desat (=< 80) Count: %s\t" %below, "for %s min\n" %((b_len*2)/60.)
print "Post Mild Desat (85-89) Count: %s\t" %above2, "for %s min" %((a_len2*2)/60.)
print "Post Mod Desat (81-84) Count: %s\t" %middle2, "for %s min" %((m_len2*2)/60.)
print "Post Sev Desat (=< 80) Count: %s\t" %below2, "for %s min\n" %((b_len2*2)/60.)
print "Data Recording Time!"
print '*' * 10
print "Pre-Exam Data Recording Length\t", X - Y # start of exam - first data point
print "Post-Exam Data Recording Length\t", Q - Z #last data point - end of exam
print "Total Data Recording Length\t", Q - Y #last data point - first data point
In [14]:
#did it count check sort correctly?
print "Mild check"
for key, val in b_dict.iteritems():
print all(i <=80 for i in val)
print "Moderate check"
for key, val in m_dict.iteritems():
print all(i >= 81 and i<=84 for i in val)
print "Severe check"
for key, val in a_dict.iteritems():
print all(i >= 85 and i<=89 for i in val)